# yI0-xI0-related-2iv-EMWproposal.R
# Philips, Andrew Q. "How to Avoid Incorrect Inference (While Gaining Correct Ones) in Dynamic Models". Forthcoming at Political Science Research and Methods. 
# andrew.philips@colorado.edu
# 2/9/21
#
# Figures produced in this R script:
#   --SI, Figure 36: "yi0-xi0-related-2iv-evar1-emwproposal.pdf"
#   --SI, Figure 37: "yi0-xi0-related-2iv-evar5-emwproposal.pdf"  
# -----------------------------------
# -----------------------------------
# EXPERIMENT 5: Y ~ I(0), X ~ I(0) and related. Also adding related Z ~ I(0)
# Type II error
# ONLY CALCULATING THE LRE IF L.X (ARDL/ECM) AND X (LDV) ARE FIRST STAT SIG (to appear in SI, Section 2)
# -----------------------------------

rm(list = ls())
setwd("~/Dropbox/My_Folder/Papers-Projects/Wlezien-Enns-PSRM/Final-Feb2021/scripts/")
# -----------------------------------
library(dynamac) 
library(nlWaldTest)
library(ggplot2)
library(grid)
library(gridExtra)
library(dplyr)
library(mvtnorm) # for drawing corr X,Z

set.seed(39201848)
# -------------------------------------------------------------
# EXPERIMENT 5: Y ~ I(0), X ~ I(0), and related. Also adding correlated I(0) Z
sims = 2000 # no. sims
N <- c(50, 250, 1000) # number of obs
e.var <- c(1, 5) # error variance in Y
ar.xz <- 0.5 # autoregression in both X and Z
B.x.true = 2
B.z.true = 1
B.lx.true = c(1, 0, -1)
B.lz.true = 1
phi.true = c(0.2, 0.8)
cov.xz <- c(0.2, 0.75)
# so LRMs are:
(B.x.true + B.lx.true[1])/(1 - phi.true[1]) # 3.75
(B.x.true + B.lx.true[2])/(1 - phi.true[1]) # 2.5
(B.x.true + B.lx.true[3])/(1 - phi.true[1]) # 1.25
(B.x.true + B.lx.true[1])/(1 - phi.true[2]) # 15
(B.x.true + B.lx.true[2])/(1 - phi.true[2]) # 10
(B.x.true + B.lx.true[3])/(1 - phi.true[2]) # 5

container.ardl <- matrix(NA, nrow = sims*length(N)*length(e.var)*length(phi.true)*length(B.lx.true)*length(cov.xz), ncol = 12) # null matrix to hold output
# cols = sims, N, Beta.lx, UL.beta.lx, LL.beta.lx, LR.x, LR.x.UL, LR.x.LL, e.var, phi.true, B.lx.true, cov.xz
container.ecm <- matrix(NA, nrow = sims*length(N)*length(e.var)*length(phi.true)*length(B.lx.true)*length(cov.xz), ncol = 12) # null matrix to hold output
# cols = sims, N, Beta.lx, UL.beta.lx, LL.beta.lx, LR.x, LR.x.UL, LR.x.LL, e.var, phi.true, B.lx.true, cov.xz
container.ldv <- matrix(NA, nrow = sims*length(N)*length(e.var)*length(phi.true)*length(B.lx.true)*length(cov.xz), ncol = 12) # null matrix to hold output
# cols = sims, N, Beta.x, UL.beta.x, LL.beta.x, LR.x, LR.x.UL, LR.x.LL, e.var, phi.true, B.lx.true, cov.xz

row <- 1
prog <- txtProgressBar(min = 0, max = sims, style = 3)
for (s in 1:sims) {
  for (n in N) {
    for (ev in e.var) {
      for (p in phi.true) {
        for (b in B.lx.true) {
          for (cv in cov.xz) {
            setTxtProgressBar(prog, value = s)
            
            # construct x,z:
            error.mat <- mvtnorm::rmvnorm(n = n + 100, mean = c(0,0), sigma = matrix(c(1, cv, cv, 1), nrow = 2)) # define cov matrix of residuals and create matrix
            x <- arima.sim(n = n + 100, model = list(ar = c(ar.xz)), innov = error.mat[,1])
            z <- arima.sim(n = n + 100, model = list(ar = c(ar.xz)), innov = error.mat[,2])
            
            y.error <- rnorm(n + 100, sd = sqrt(ev))
            y <- y.error
            for(i in 2:length(x)) {
              y[i] <- p*y[i-1] + B.x.true*x[i] + b*x[i-1] + B.z.true*z[i] + B.lz.true*z[i-1] + y.error[i]
            }
            
            # ARDL model:
            res.ardl <- lm(y[101:length(y)] ~ lshift(y, 1)[101:length(y)] + x[101:length(x)] + lshift(x, 1)[101:length(x)] + z[101:length(z)] + lshift(z, 1)[101:length(z)])
            # ECM model:
            res.ecm <- lm(dshift(y)[101:length(y)] ~ lshift(y, 1)[101:length(y)] + dshift(x)[101:length(x)] + lshift(x, 1)[101:length(x)] + dshift(z)[101:length(z)] + lshift(z, 1)[101:length(z)])
            # LDV model
            res.ldv <- lm(y[101:length(y)] ~ lshift(y, 1)[101:length(y)] + x[101:length(x)] + z[101:length(z)])
            
            # grab QOI (FORALL): ----
            container.ardl[row, 1] <- container.ecm[row, 1] <- container.ldv[row, 1] <- s # no sims
            container.ardl[row, 2] <- container.ecm[row, 2] <- container.ldv[row, 2] <- n # T
            container.ardl[row, 9] <- container.ecm[row, 9] <- container.ldv[row, 9] <- ev # error variance of Y
            container.ardl[row, 10] <- container.ecm[row, 10] <- container.ldv[row, 10] <- p # true value of phi
            container.ardl[row, 11] <- container.ecm[row, 11] <- container.ldv[row, 11] <- b # true value of effect of l.x
            container.ardl[row, 12] <- container.ecm[row, 12] <- container.ldv[row, 12] <- cv # cov.xz
            
            # QOI (ARDL)
            container.ardl[row, 3] <- coef(res.ardl)[4] # beta.lx
            container.ardl[row, 4] <- confint.default(res.ardl, parm = c(4), level = 0.95)[2] # UL
            container.ardl[row, 5] <- confint.default(res.ardl, parm = c(4), level = 0.95)[1] # LL
            # LR effects (ARDL)
            lrm <- nlConfint(res.ardl, "(a[3] + a[4])/(1-a[2])", level = 0.95)
            container.ardl[row, 6] <- lrm[1] # LRM
            container.ardl[row, 7] <- lrm[3] # LRM UL
            container.ardl[row, 8] <- lrm[2] # LRM LL
            
            # QOI (ECM)
            container.ecm[row, 3] <- coef(res.ecm)[4] # beta.lx
            container.ecm[row, 4] <- confint.default(res.ecm, parm = c(4), level = 0.95)[2] # UL
            container.ecm[row, 5] <- confint.default(res.ecm, parm = c(4), level = 0.95)[1] # LL
            # LR effects (ECM)
            lrm <- nlConfint(res.ecm, "(a[4])/(-a[2])", level = 0.95)
            container.ecm[row, 6] <- lrm[1] # LRM
            container.ecm[row, 7] <- lrm[3] # LRM UL
            container.ecm[row, 8] <- lrm[2] # LRM LL
            
            # QOI (LDV)
            container.ldv[row, 3] <- coef(res.ldv)[3]
            container.ldv[row, 4] <- confint.default(res.ldv, parm = c(3), level = 0.95)[2] # UL
            container.ldv[row, 5] <- confint.default(res.ldv, parm = c(3), level = 0.95)[1] # LL
            # LR effects (LDV)
            lrm <- nlConfint(res.ldv, "(a[3])/(1-a[2])", level = 0.95)
            container.ldv[row, 6] <- lrm[1] # LRM
            container.ldv[row, 7] <- lrm[3] # LRM UL
            container.ldv[row, 8] <- lrm[2] # LRM LL
            
            row <- row + 1
          }
        }
      }
    }
  }
}
#save.image("scenario-yi0-xi-related-2iv-EMWproposal-data.RData")
#load("scenario-yi0-xi-related-2iv-EMWproposal-data.RData")

# Create rejection rates and label:
container.ardl <- as.data.frame(container.ardl)
# cols = sim, N, Beta.lx, UL.beta.lx, LL.beta.lx, LR.x, LR.x.UL, LR.x.LL, e.var, phi, B.lx.true, cov.xz
colnames(container.ardl) <- c("sims", "Time", "Beta.lx", "Beta.lx.ul", "Beta.lx.ll", "LR.x", "LR.x.UL", "LR.x.LL", "e.var", "phi", "B.lx.true", "cov.xz")
container.ecm <- as.data.frame(container.ecm)
# cols = sim, N, Beta.lx, UL.beta.lx, LL.beta.lx, LR.x, LR.x.UL, LR.x.LL, e.var, phi, B.lx.true, cov.xz
colnames(container.ecm) <- c("sims", "Time", "Beta.lx", "Beta.lx.ul", "Beta.lx.ll", "LR.x", "LR.x.UL", "LR.x.LL", "e.var", "phi", "B.lx.true", "cov.xz")
container.ldv <- as.data.frame(container.ldv)
# cols = sim, N, Beta.x, UL.beta.x, LL.beta.x, LR.x, LR.x.UL, LR.x.LL, e.var, phi, B.lx.true, cov.xz
colnames(container.ldv) <- c("sims", "Time", "Beta.x", "Beta.x.ul", "Beta.x.ll", "LR.x", "LR.x.UL", "LR.x.LL", "e.var", "phi", "B.lx.true", "cov.xz")

# proportion rejecting beta.lx = 0 (ARDL/ECM) or beta.x = 0 (LDV)
container.ardl$reject.beta.lx <- ifelse(container.ardl$Beta.lx.ll > 0 | container.ardl$Beta.lx.ul < 0, 1, 0)
container.ecm$reject.beta.lx <- ifelse(container.ecm$Beta.lx.ll > 0 | container.ecm$Beta.lx.ul < 0, 1, 0)
container.ldv$reject.beta.x <- ifelse(container.ldv$Beta.x.ll > 0 | container.ldv$Beta.x.ul < 0, 1, 0)

# proportion rejection, LR (from truth; i.e., = 1 if constructed 95% CI's DON'T encompass true LRM):
container.ardl$reject.lrm.truth <- ifelse(container.ardl$LR.x.LL > ((B.x.true + container.ardl$B.lx.true)/(1-container.ardl$phi)) | container.ardl$LR.x.UL < ((B.x.true + container.ardl$B.lx.true)/(1-container.ardl$phi)), 1, 0)
container.ecm$reject.lrm.truth <- ifelse(container.ecm$LR.x.LL > ((B.x.true + container.ecm$B.lx.true)/(1-container.ecm$phi)) | container.ecm$LR.x.UL < ((B.x.true + container.ecm$B.lx.true)/(1-container.ecm$phi)), 1, 0)
container.ldv$reject.lrm.truth <- ifelse(container.ldv$LR.x.LL > ((B.x.true + container.ldv$B.lx.true)/(1-container.ldv$phi)) | container.ldv$LR.x.UL < ((B.x.true + container.ldv$B.lx.true)/(1-container.ldv$phi)), 1, 0)

# Coding of rejection of LRM:
# = 1 if we reject LRM = truth AND also reject beta.lx/beta.x = 0, 0 otherwise
container.ardl$reject.lrm.yes.lx <- ifelse(container.ardl$reject.lrm.truth == 1 & container.ardl$reject.beta.lx == 1, 1, 0)
container.ecm$reject.lrm.yes.lx <- ifelse(container.ecm$reject.lrm.truth == 1 & container.ecm$reject.beta.lx == 1, 1, 0)
container.ldv$reject.lrm.yes.x <- ifelse(container.ldv$reject.lrm.truth == 1 & container.ldv$reject.beta.x == 1, 1, 0)

# = 1 if we don't reject LRM and also don't reject beta.lx/beta.x
container.ardl$reject.lrm.no.lx.no <- ifelse(container.ardl$reject.lrm.truth == 0 & container.ardl$reject.beta.lx == 0, 1, 0)
container.ecm$reject.lrm.no.lx.no <- ifelse(container.ecm$reject.lrm.truth == 0 & container.ecm$reject.beta.lx == 0, 1, 0)
container.ldv$reject.lrm.no.x.no <- ifelse(container.ldv$reject.lrm.truth == 0 & container.ldv$reject.beta.x == 0, 1, 0)

# = 1 if we reject LRM  = truth AND NOT reject beta.lx/beta.x = 0, 0 otherwise
container.ardl$reject.lrm.no.lx <- ifelse(container.ardl$reject.lrm.truth == 1 & container.ardl$reject.beta.lx == 0, 1, 0)
container.ecm$reject.lrm.no.lx <- ifelse(container.ecm$reject.lrm.truth == 1 & container.ecm$reject.beta.lx == 0, 1, 0)
container.ldv$reject.lrm.no.x <- ifelse(container.ldv$reject.lrm.truth == 1 & container.ldv$reject.beta.x == 0, 1, 0)

# collapse everything down:
container.collapse.ardl <- container.ardl %>%
  group_by(Time, e.var, phi, B.lx.true, cov.xz) %>%
  summarize(reject.beta.lx = mean(reject.beta.lx),
            reject.lrm.truth = mean(reject.lrm.truth),
            reject.lrm.yes.lx = mean(reject.lrm.yes.lx),
            reject.lrm.no.lx = mean(reject.lrm.no.lx),
            reject.lrm.no.lx.no = mean(reject.lrm.no.lx.no))

container.collapse.ecm <- container.ecm %>%
  group_by(Time, e.var, phi, B.lx.true, cov.xz) %>%
  summarize(reject.beta.lx = mean(reject.beta.lx),
            reject.lrm.truth = mean(reject.lrm.truth),
            reject.lrm.yes.lx = mean(reject.lrm.yes.lx),
            reject.lrm.no.lx = mean(reject.lrm.no.lx),
            reject.lrm.no.lx.no = mean(reject.lrm.no.lx.no))

container.collapse.ldv <- container.ldv %>%
  group_by(Time, e.var, phi, B.lx.true, cov.xz) %>%
  summarize(reject.beta.x = mean(reject.beta.x), 
            reject.lrm.truth = mean(reject.lrm.truth),
            reject.lrm.yes.lx = mean(reject.lrm.yes.x), # need to label this as lx for appending, even though it's x
            reject.lrm.no.lx = mean(reject.lrm.no.x),
            reject.lrm.no.lx.no = mean(reject.lrm.no.x.no)) # need to label this as lx for appending, even though it's x

# need to combine all the model datasets since geom_bar doesn't work on separate datasets
container.collapse.ardl$model.type <- "ARDL"
container.collapse.ecm$model.type <- "ECM"
container.collapse.ldv$model.type <- "LDV"

# Create long-run appended dataset:
# long run: time (1), e.var (2), phi (3), B.lx.true (4), cov.xz (5), reject.lrm.truth (7), reject.lrm.yes.lx (8), reject.lrm.no (9), reject.lrm.no.lx.no (10), model type (11) 
container.collapse.all.LR <- rbind(container.collapse.ardl[c(1,2,3,4,5,7,8,9,10,11)], container.collapse.ecm[c(1,2,3,4,5,7,8,9,10,11)], container.collapse.ldv[c(1,2,3,4,5,7,8,9,10,11)])
container.collapse.all.LR$Time <- as.factor(container.collapse.all.LR$Time)
container.collapse.all.LR$phi <- as.factor(container.collapse.all.LR$phi)
container.collapse.all.LR$e.var <- as.factor(container.collapse.all.LR$e.var)
container.collapse.all.LR$cov.xz <- as.factor(container.collapse.all.LR$cov.xz)
container.collapse.all.LR$B.lx.true <- as.factor(container.collapse.all.LR$B.lx.true)
container.collapse.all.LR$mc.type[container.collapse.all.LR$phi == .8 & container.collapse.all.LR$B.lx.true == 1] <- "Alpha = 0.8, Lag X effect = 1"
container.collapse.all.LR$mc.type[container.collapse.all.LR$phi == .8 & container.collapse.all.LR$B.lx.true == 0] <- "Alpha = 0.8, Lag X effect = 0"
container.collapse.all.LR$mc.type[container.collapse.all.LR$phi == .8 & container.collapse.all.LR$B.lx.true == -1] <- "Alpha = 0.8, Lag X effect = -1"
container.collapse.all.LR$mc.type[container.collapse.all.LR$phi == .2 & container.collapse.all.LR$B.lx.true == 1] <- "Alpha = 0.2, Lag X effect = 1"
container.collapse.all.LR$mc.type[container.collapse.all.LR$phi == .2 & container.collapse.all.LR$B.lx.true == 0] <- "Alpha = 0.2, Lag X effect = 0"
container.collapse.all.LR$mc.type[container.collapse.all.LR$phi == .2 & container.collapse.all.LR$B.lx.true == -1] <- "Alpha = 0.2, Lag X effect = -1"
table(container.collapse.all.LR$mc.type)

# plot as a proportion out of all times we would have correctly covered the LRE:
container.collapse.all.LR$prop.miss.pctcorrectcov <- container.collapse.all.LR$reject.lrm.no.lx.no/(1 - container.collapse.all.LR$reject.lrm.truth)

p1 <- ggplot(data = subset(container.collapse.all.LR, e.var == 1 & cov.xz == 0.2), aes(x = model.type, y = prop.miss.pctcorrectcov, fill = Time)) + geom_bar(stat = "identity", position = position_dodge()) + theme_minimal() + scale_fill_brewer(palette="Oranges")  + ylab("Proportion Missed (out of correct coverage)") + xlab("Model") + ggtitle("Long-Run, Cov(x,z) = 0.2") + facet_wrap(~mc.type) # we get some missings b/c dividing by 0 for LDV

p2 <- ggplot(data = subset(container.collapse.all.LR, e.var == 1 & cov.xz == 0.75), aes(x = model.type, y = prop.miss.pctcorrectcov, fill = Time)) + geom_bar(stat = "identity", position = position_dodge()) + theme_minimal() + scale_fill_brewer(palette="Oranges") +  ylab("Proportion Missed (out of correct coverage)") + xlab("Model") + ggtitle("Long-Run, Cov(x,z) = 0.75") + facet_wrap(~mc.type) 

pdf("yi0-xi0-related-2iv-evar1-emwproposal.pdf", width = 1.618*7, height = 7)
grid.arrange(p1, p2, ncol = 1)
dev.off()

p1 <- ggplot(data = subset(container.collapse.all.LR, e.var == 5 & cov.xz == 0.2), aes(x = model.type, y = prop.miss.pctcorrectcov, fill = Time)) + geom_bar(stat = "identity", position = position_dodge()) + theme_minimal() + scale_fill_brewer(palette="Oranges") +  ylab("Proportion Missed (out of correct coverage)") + xlab("Model") + ggtitle("Long-Run, Cov(x,z) = 0.2") + facet_wrap(~mc.type) 

p2 <- ggplot(data = subset(container.collapse.all.LR, e.var == 5 & cov.xz == 0.75), aes(x = model.type, y = prop.miss.pctcorrectcov, fill = Time)) + geom_bar(stat = "identity", position = position_dodge()) + theme_minimal() + scale_fill_brewer(palette="Oranges") +  ylab("Proportion Missed (out of correct coverage)") + xlab("Model") + ggtitle("Long-Run, Cov(x,z) = 0.75") + facet_wrap(~mc.type) 

pdf("yi0-xi0-related-2iv-evar5-emwproposal.pdf", width = 1.618*7, height = 7)
grid.arrange(p1, p2, ncol = 1)
dev.off()
